// Copyright 2022 The spiderman-bilibili Authors. All rights reserved.

// Project: spiderman-bilibili
// IDE: GoLand
// Author: caihe.fu@qq.com
// File: user_list.go
// Date: 2022/8/17 11:24
// Description: 哔哩哔哩 - 用户列表数据

package main

import (
	"encoding/csv"
	"fmt"
	"os"
	"time"

	"github.com/gocolly/colly/v2"
	"github.com/gocolly/colly/v2/extensions"
	"github.com/tidwall/gjson"
)

func spiderUserList() {
	// https://space.bilibili.com/280793434/video
	mid := "280793434" // 账号ID
	name := "手工耿"      // 账号名称

	// 生产 csv 文件
	csvPath := fmt.Sprintf("%s_%d.csv", name, time.Now().Unix())
	f, err := os.OpenFile(csvPath, os.O_CREATE|os.O_RDWR, 0644)
	if err != nil {
		fmt.Println(err)
		os.Exit(0)
	}
	defer func() { _ = f.Close() }()

	// 写入UTF-8 BOM，防止中文乱码
	_, _ = f.WriteString("\xEF\xBB\xBF")
	w := csv.NewWriter(f)

	// 写入表头
	_ = w.Write([]string{"账号名称", "作者", "标题", "发布时间", "时长", "评论量", "播放量", "链接"})
	w.Flush()

	// 新建一个 colly 采集器
	c := colly.NewCollector()

	// 允许重复采集
	c.AllowURLRevisit = true

	// 设置超时时间
	c.SetRequestTimeout(15 * time.Second)

	// 自动配置 referer
	extensions.Referer(c)

	// 设置随机 user-agent
	extensions.RandomUserAgent(c)

	// 是否跳出循环, 如果数据采集完了, 则跳出
	isBreak := false

	c.OnResponse(func(resp *colly.Response) {
		// 当前页
		pageNum := gjson.GetBytes(resp.Body, "data.page.pn").Int()

		// 总数据量
		dataCount := gjson.GetBytes(resp.Body, "data.page.count").Int()

		fmt.Println("page_num ->", pageNum)
		fmt.Println("data_count ->", dataCount)

		// 视频内容数组
		videoList := gjson.GetBytes(resp.Body, "data.list.vlist").Array()

		if len(videoList) == 0 {
			isBreak = true
		}

		for _, video := range videoList {
			title := video.Get("title").String()      // 标题
			author := video.Get("author").String()    // 作者
			comment := video.Get("comment").String()  // 评论量
			play := video.Get("play").String()        // 播放量
			created := video.Get("created").Int()     // 创建时间
			videoTime := video.Get("length").String() // 视频时长
			bvID := video.Get("bvid").String()        // 视频ID
			link := "https://www.bilibili.com/video/" + bvID

			releaseTxt := time.Unix(created, 0).Format("2006-01-02 15:04:05")

			fmt.Println("title ->", title)
			fmt.Println("author ->", author)
			fmt.Println("comment ->", comment)
			fmt.Println("play ->", play)
			fmt.Println("create ->", created)
			fmt.Println("video_time ->", videoTime)
			fmt.Println("link ->", link)

			fmt.Println("---------------")

			// 写入 csv 文件
			// "", "", "", "", "", "", "", "链接"
			_ = w.Write([]string{
				name,       // 账号名称
				author,     // 作者
				title,      // 标题
				releaseTxt, // 发布时间
				videoTime,  // 时长
				comment,    // 评论量
				play,       // 播放量
				link,       // 链接
			})
			w.Flush()
		}
	})

	pageAll := 3 // 最多采集多少页
	for i := 0; i < pageAll; i++ {
		// https://api.bilibili.com/x/space/arc/search?mid=280793434&ps=30&tid=0&pn=1&keyword=&order=pubdate&jsonp=jsonp
		vURL := fmt.Sprintf("https://api.bilibili.com/x/space/arc/search?mid=%s&ps=30&tid=0&pn=%d&keyword=&order=pubdate&jsonp=jsonp", mid, i+1)
		if err := c.Visit(vURL); err != nil {
			fmt.Println(err)
		}
		fmt.Printf("========== page %d done ==========\n", i+1)

		if isBreak {
			break
		}

		// 休眠 5 秒, 防止被屏蔽, 如果需要快速采集数据, 可使用代理IP
		time.Sleep(5 * time.Second)
	}
}
